Regression modeling
with Guediawaye Census data
#"hh_size",
predictors = c("menage","Aucun","Primair","Moyen","Secondr","fertlty","Ltrcy_F","nbr_cm_h","nbr_cm_f","pct_cm_f")
outcome = "MPI_men"
Inspecting the
outcome variable (MPI) with visualization
mhv_map <- ggplot(MPI_data_dr_2013, aes(fill = MPI_men)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
theme_void() +
labs(fill = "MPI ")
mhv_histogram <- ggplot(MPI_data_dr_2013, aes(x = MPI_men)) +
geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
bins = 100) +
theme_minimal() +
scale_x_continuous(labels = scales::label_number_si(accuracy = 0.1)) +
labs(x = "MPI")
mhv_map + mhv_histogram + labs(title = "MPI value charts for Guediawaye Census 2013")

MPI_data_dr %>%
ggplot(aes(fill = MPI_men)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
theme_void() +
labs(fill = "MPI ")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

ggplot(MPI_data_dr, aes(x = MPI_men)) +
geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
bins = 100) +
theme_minimal() +
scale_x_continuous(labels = scales::label_number_si(accuracy = 0.1)) +
labs(x = "MPI")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

mhv_map_log <- ggplot(MPI_data_dr_2013, aes(fill = log(MPI_men))) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
theme_void() +
labs(fill = "MPI\nvalue (log)")
mhv_histogram_log <- ggplot(MPI_data_dr_2013, aes(x = log(MPI_men))) +
geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
bins = 100) +
theme_minimal() +
scale_x_continuous() +
labs(x = "MPI (log)")
mhv_map_log + mhv_histogram_log + labs(title = "Logged MPI value charts for Guediawaye Census 2013")

MPI_data_dr %>%
ggplot(aes(fill = log(MPI_men))) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
theme_void() +
labs(fill = "MPI\nvalue (log)")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

A first regression
model
library(sf)
library(units)
predictors = c("menage","Primair","Moyen","Secondr","Ltrcy_F","pct_cm_f","hh_size","Ltrcy_A","Ltrcy_W","Ltrcy_M","fertlty")
outcome = "MPI_men"
MPI_data_dr_2013_for_model<- MPI_data_dr_2013 %>%
dplyr::select(MPI_men,predictors) %>%
mutate(pop_density = as.numeric(set_units(hh_size / st_area(.), "1/km2"))) %>%
dplyr::select(-hh_size)
##
MPI_data_dr_2002_for_model<- MPI_data_dr_2002 %>%
dplyr::select(MPI_men,predictors) %>%
mutate(pop_density = as.numeric(set_units(hh_size / st_area(.), "1/km2"))) %>%
dplyr::select(-hh_size)
formula <- "log(MPI_men) ~ menage + Primair + Moyen + Secondr + Ltrcy_F + pct_cm_f + pop_density + Ltrcy_A + Ltrcy_W + Ltrcy_M + fertlty"
model_2013 <- lm(formula = formula, data = MPI_data_dr_2013_for_model)
summary(model_2013)
##
## Call:
## lm(formula = formula, data = MPI_data_dr_2013_for_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.96596 -0.15218 0.01508 0.16678 0.91556
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.206e+00 7.882e-02 -27.993 < 2e-16 ***
## menage 8.920e-03 8.090e-04 11.026 < 2e-16 ***
## Primair 4.446e-05 3.742e-04 0.119 0.905487
## Moyen -4.062e-03 8.660e-04 -4.690 3.69e-06 ***
## Secondr -5.420e-03 1.042e-03 -5.202 3.07e-07 ***
## Ltrcy_F -1.080e-03 2.616e-04 -4.129 4.40e-05 ***
## pct_cm_f -2.412e-01 1.605e-01 -1.503 0.133712
## pop_density 3.318e-06 8.565e-07 3.874 0.000124 ***
## Ltrcy_A 8.242e-04 3.680e-04 2.240 0.025632 *
## Ltrcy_W 3.518e-04 3.775e-04 0.932 0.351926
## Ltrcy_M -5.072e-03 1.121e-02 -0.452 0.651151
## fertlty 1.192e-04 7.926e-05 1.504 0.133383
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2763 on 423 degrees of freedom
## Multiple R-squared: 0.5892, Adjusted R-squared: 0.5786
## F-statistic: 55.16 on 11 and 423 DF, p-value: < 2.2e-16
###
model_2002 <- lm(formula = formula, data = MPI_data_dr_2002_for_model)
summary(model_2002)
##
## Call:
## lm(formula = formula, data = MPI_data_dr_2002_for_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.93813 -0.14645 0.02431 0.15303 0.92895
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.501e+00 9.538e-02 -26.218 < 2e-16 ***
## menage 5.085e-03 6.443e-04 7.893 8.64e-14 ***
## Primair -5.171e-04 9.546e-04 -0.542 0.588466
## Moyen -4.773e-03 1.305e-03 -3.659 0.000308 ***
## Secondr -3.697e-03 1.733e-03 -2.134 0.033829 *
## Ltrcy_F 6.199e-04 1.007e-03 0.615 0.538816
## pct_cm_f 9.399e-02 2.789e-01 0.337 0.736438
## pop_density -7.584e-07 8.747e-07 -0.867 0.386742
## Ltrcy_A 3.455e-04 9.738e-05 3.548 0.000461 ***
## Ltrcy_W 2.135e-03 7.067e-04 3.021 0.002773 **
## Ltrcy_M -1.789e-02 1.211e-02 -1.477 0.140974
## fertlty 4.070e-05 5.002e-05 0.814 0.416656
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2598 on 256 degrees of freedom
## Multiple R-squared: 0.6362, Adjusted R-squared: 0.6205
## F-statistic: 40.69 on 11 and 256 DF, p-value: < 2.2e-16
library(corrr)
dfw_estimates_2013 <- MPI_data_dr_2013_for_model%>%
select(-MPI_men) %>%
st_drop_geometry()
correlations <- correlate(dfw_estimates_2013, method = "pearson")
network_plot(correlations) + labs(title = "Network plot of correlations between model predictors for Guediawaye Census 2013")

##
dfw_estimates_2002 <- MPI_data_dr_2002_for_model%>%
select(-MPI_men) %>%
st_drop_geometry()
correlations <- correlate(dfw_estimates_2002, method = "pearson")
network_plot(correlations) + labs(title = "Network plot of correlations between model predictors for Guediawaye Census 2002")

library(car)
vif(model_2013)
## menage Primair Moyen Secondr Ltrcy_F pct_cm_f
## 3.871362 6.365508 8.106130 6.015662 7.975025 1.078787
## pop_density Ltrcy_A Ltrcy_W Ltrcy_M fertlty
## 1.311097 1.753157 1.482201 1.495714 5.367797
vif(model_2002)
## menage Primair Moyen Secondr Ltrcy_F pct_cm_f
## 3.264182 296.965177 62.395236 23.508009 676.277234 1.192709
## pop_density Ltrcy_A Ltrcy_W Ltrcy_M fertlty
## 5.102300 4.317702 2.812754 2.154419 12.827831
Dimension reduction
with principal components analysis
pca_2013 <- prcomp(
formula = ~.,
data = dfw_estimates_2013,
scale. = TRUE,
center = TRUE
)
summary(pca_2013)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.1830 1.2702 1.1991 1.0093 0.86575 0.70859 0.63546
## Proportion of Variance 0.4332 0.1467 0.1307 0.0926 0.06814 0.04565 0.03671
## Cumulative Proportion 0.4332 0.5799 0.7106 0.8032 0.87134 0.91698 0.95369
## PC8 PC9 PC10 PC11
## Standard deviation 0.46543 0.35450 0.28979 0.28829
## Proportion of Variance 0.01969 0.01142 0.00763 0.00756
## Cumulative Proportion 0.97339 0.98481 0.99244 1.00000
##
pca_2002 <- prcomp(
formula = ~.,
data = dfw_estimates_2002,
scale. = TRUE,
center = TRUE
)
summary(pca_2002)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.4838 1.2668 1.1312 0.89900 0.69049 0.5128 0.42401
## Proportion of Variance 0.5608 0.1459 0.1163 0.07347 0.04334 0.0239 0.01634
## Cumulative Proportion 0.5608 0.7067 0.8231 0.89653 0.93988 0.9638 0.98012
## PC8 PC9 PC10 PC11
## Standard deviation 0.34342 0.24906 0.19413 0.03123
## Proportion of Variance 0.01072 0.00564 0.00343 0.00009
## Cumulative Proportion 0.99085 0.99649 0.99991 1.00000
pca_2013_tibble <- pca_2013$rotation %>%
as_tibble(rownames = "predictor")
pca_2013_tibble
## # A tibble: 11 × 12
## predictor PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 menage 0.411 0.0273 0.0207 -0.00164 -0.116 0.171 -0.100 -0.860
## 2 Primair 0.391 0.00141 0.292 -0.134 -0.0930 0.278 -0.182 0.321
## 3 Moyen 0.423 0.0591 -0.169 -0.0299 -0.123 -0.105 0.0881 0.292
## 4 Secondr 0.359 0.0776 -0.404 0.0668 -0.0638 -0.350 0.242 -0.0563
## 5 Ltrcy_F 0.419 -0.0249 -0.140 0.0606 0.118 -0.310 0.179 0.148
## 6 pct_cm_f -0.0501 -0.160 0.0501 -0.944 0.0275 -0.226 0.135 -0.0901
## 7 Ltrcy_A 0.187 -0.242 0.385 0.128 0.804 -0.193 -0.00962 -0.0681
## 8 Ltrcy_W 0.00824 -0.670 -0.121 0.0918 -0.249 -0.329 -0.597 0.0156
## 9 Ltrcy_M 0.00631 -0.670 -0.169 0.0737 -0.0180 0.453 0.556 0.0105
## 10 fertlty 0.397 -0.0175 0.210 -0.142 -0.0923 0.369 -0.195 0.177
## 11 pop_densi… 0.00232 -0.0769 0.682 0.178 -0.477 -0.355 0.370 -0.0461
## # ℹ 3 more variables: PC9 <dbl>, PC10 <dbl>, PC11 <dbl>
###
pca_2002_tibble <- pca_2002$rotation %>%
as_tibble(rownames = "predictor")
pca_2002_tibble
## # A tibble: 11 × 12
## predictor PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 menage 0.317 -0.151 0.0769 1.55e-1 0.743 -0.382 0.0984 -0.336
## 2 Primair 0.386 -0.0978 -0.0903 1.81e-2 0.0918 0.125 0.0852 0.551
## 3 Moyen 0.366 0.0739 0.250 2.22e-1 -0.183 0.0866 -0.0602 0.0991
## 4 Secondr 0.259 0.199 0.520 3.77e-1 -0.233 0.163 -0.0439 -0.402
## 5 Ltrcy_F 0.394 -0.0114 0.0839 1.19e-1 -0.0360 0.120 0.0225 0.326
## 6 pct_cm_f -0.0470 -0.168 -0.610 7.51e-1 -0.0978 0.0682 -0.0991 -0.0936
## 7 Ltrcy_A 0.342 -0.0716 -0.174 -2.33e-1 -0.308 -0.483 -0.664 -0.112
## 8 Ltrcy_W 0.190 0.587 -0.273 5.99e-4 -0.231 -0.446 0.540 -0.0125
## 9 Ltrcy_M 0.0267 0.708 -0.206 -4.48e-2 0.388 0.349 -0.425 -0.00843
## 10 fertlty 0.373 -0.141 -0.163 -1.39e-1 0.141 0.134 0.0552 0.0928
## 11 pop_density 0.315 -0.157 -0.316 -3.62e-1 -0.150 0.461 0.226 -0.525
## # ℹ 3 more variables: PC9 <dbl>, PC10 <dbl>, PC11 <dbl>
pca_2013_tibble %>%
select(predictor:PC5) %>%
pivot_longer(PC1:PC5, names_to = "component", values_to = "value") %>%
ggplot(aes(x = value, y = predictor)) +
geom_col(fill = "darkgreen", color = "darkgreen", alpha = 0.5) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL, x = "Value", title = " Loadings for first five principal components for Guediawaye Census 2013") +
theme_minimal()

pca_2002_tibble %>%
select(predictor:PC5) %>%
pivot_longer(PC1:PC5, names_to = "component", values_to = "value") %>%
ggplot(aes(x = value, y = predictor)) +
geom_col(fill = "darkgreen", color = "darkgreen", alpha = 0.5) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL, x = "Value", title = " Loadings for first five principal components for Guediawaye Census 2002") +
theme_minimal()

components_2013 <- predict(pca_2013, dfw_estimates_2013)
dfw_pca_2013 <- MPI_data_dr_2013_for_model%>%
select(MPI_men) %>%
cbind(components_2013)
ggplot(dfw_pca_2013, aes(fill = PC1)) +
geom_sf(color = NA) +
labs(title = "Map of principal component 1 for Guediawaye Census 2013") +
theme_void() +
scale_fill_viridis_c()

components_2002 <- predict(pca_2002, dfw_estimates_2002)
dfw_pca_2002 <- MPI_data_dr_2002_for_model%>%
select(MPI_men) %>%
cbind(components_2002)
ggplot(dfw_pca_2002, aes(fill = PC1)) +
geom_sf(color = NA) +
labs(title = "Map of principal component 1 for Guediawaye Census 2002") +
theme_void() +
scale_fill_viridis_c()

dfw_pca_2002$RGPH<-"2002"
dfw_pca_2013$RGPH<-"2013"
dfw_pca <- dfw_pca_2002 %>%
plyr::rbind.fill(dfw_pca_2013) %>%
st_as_sf()
dfw_pca %>%
ggplot(aes(fill = PC1)) +
# Adding spatial features to the plot
geom_sf(color = NA) +
scale_fill_viridis_c()+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

dfw_pca %>%
ggplot(aes(fill = PC2)) +
# Adding spatial features to the plot
geom_sf(color = NA) +
scale_fill_viridis_c()+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

pca_2013_formula <- paste0("log(MPI_men) ~ ",
paste0('PC', 1:6, collapse = ' + '))
pca_2013_model <- lm(formula = pca_2013_formula, data = dfw_pca_2013)
summary(pca_2013_model)
##
## Call:
## lm(formula = pca_2013_formula, data = dfw_pca_2013)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.24990 -0.18273 0.02651 0.20476 1.04093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.198765 0.015293 -143.777 < 2e-16 ***
## PC1 -0.053704 0.007014 -7.657 1.28e-13 ***
## PC2 -0.028563 0.012053 -2.370 0.0182 *
## PC3 0.190385 0.012769 14.910 < 2e-16 ***
## PC4 0.012906 0.015170 0.851 0.3954
## PC5 -0.024567 0.017685 -1.389 0.1655
## PC6 0.160993 0.021607 7.451 5.17e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.319 on 428 degrees of freedom
## Multiple R-squared: 0.4461, Adjusted R-squared: 0.4384
## F-statistic: 57.46 on 6 and 428 DF, p-value: < 2.2e-16
###
pca_2002_formula <- paste0("log(MPI_men) ~ ",
paste0('PC', 1:6, collapse = ' + '))
pca_2002_model <- lm(formula = pca_2002_formula, data = dfw_pca_2002)
summary(pca_2002_model)
##
## Call:
## lm(formula = pca_2002_formula, data = dfw_pca_2002)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.85582 -0.14204 0.02176 0.16668 0.90056
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.372798 0.016131 -147.099 < 2e-16 ***
## PC1 -0.034241 0.006507 -5.263 2.97e-07 ***
## PC2 -0.077437 0.012757 -6.070 4.49e-09 ***
## PC3 -0.184662 0.014286 -12.926 < 2e-16 ***
## PC4 -0.117479 0.017976 -6.535 3.31e-10 ***
## PC5 0.210540 0.023405 8.996 < 2e-16 ***
## PC6 -0.253701 0.031516 -8.050 2.94e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2641 on 261 degrees of freedom
## Multiple R-squared: 0.6168, Adjusted R-squared: 0.608
## F-statistic: 70.01 on 6 and 261 DF, p-value: < 2.2e-16
Spatial
regression
MPI_data_dr_2013_for_model$residuals <- residuals(model_2013)
ggplot(MPI_data_dr_2013_for_model, aes(x = residuals)) +
geom_histogram(bins = 100, alpha = 0.5, color = "navy",
fill = "navy") +
labs(title = "Distribution of model residuals for Guediawaye Census 2013") +
theme_minimal()

MPI_data_dr_2002_for_model$residuals <- residuals(model_2002)
ggplot(MPI_data_dr_2002_for_model, aes(x = residuals)) +
geom_histogram(bins = 100, alpha = 0.5, color = "navy",
fill = "navy") +
labs(title = "Distribution of model residuals for Guediawaye Census 2002") +
theme_minimal()

MPI_data_dr_for_model <- MPI_data_dr_2002_for_model %>%
dplyr::mutate(RGPH = "2002") %>%
plyr::rbind.fill(MPI_data_dr_2013_for_model %>%
dplyr::mutate(RGPH = "2013"))
ggplot(MPI_data_dr_for_model, aes(x = residuals)) +
geom_histogram(bins = 100, alpha = 0.5, color = "navy",
fill = "navy") +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

library(spdep)
wts_2013 <- MPI_data_dr_2013_for_model %>%
poly2nb() %>%
nb2listw()
moran.test(MPI_data_dr_2013_for_model$residuals, wts_2013)
##
## Moran I test under randomisation
##
## data: MPI_data_dr_2013_for_model$residuals
## weights: wts_2013
##
## Moran I statistic standard deviate = 4.7138, p-value = 1.216e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.1325984738 -0.0023041475 0.0008190204
wts_2002 <- MPI_data_dr_2002_for_model %>%
poly2nb() %>%
nb2listw()
moran.test(MPI_data_dr_2002_for_model$residuals, wts_2002)
##
## Moran I test under randomisation
##
## data: MPI_data_dr_2002_for_model$residuals
## weights: wts_2002
##
## Moran I statistic standard deviate = 2.1922, p-value = 0.01418
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.077244085 -0.003745318 0.001364879
MPI_data_dr_2013_for_model$lagged_residuals <- lag.listw(wts_2013, MPI_data_dr_2013_for_model$residuals)
ggplot(MPI_data_dr_2013_for_model, aes(x = residuals, y = lagged_residuals)) +
theme_minimal() +
labs(title = "Moran scatterplot of residual spatial autocorrelation for Guediawaye Census 2013") +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red")

MPI_data_dr_2002_for_model$lagged_residuals <- lag.listw(wts_2002, MPI_data_dr_2002_for_model$residuals)
ggplot(MPI_data_dr_2002_for_model, aes(x = residuals, y = lagged_residuals)) +
theme_minimal() +
labs(title = "Moran scatterplot of residual spatial autocorrelation for Guediawaye Census 2002") +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red")

MPI_data_dr_for_model <- MPI_data_dr_2002_for_model %>%
dplyr::mutate(RGPH = "2002") %>%
plyr::rbind.fill(MPI_data_dr_2013_for_model %>%
dplyr::mutate(RGPH = "2013"))
ggplot(MPI_data_dr_for_model, aes(x = residuals, y = lagged_residuals)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

Geographically weighted
regression
Choosing a bandwidth
for GWR
library(GWmodel)
library(sf)
dfw_data_sp_2013 <- MPI_data_dr_2013_for_model%>%
as_Spatial()
bw <- bw.gwr(
formula = formula,
data = dfw_data_sp_2013,
kernel = "bisquare",
adaptive = TRUE
)
## Adaptive bandwidth: 276 CV score: 33.03409
## Adaptive bandwidth: 179 CV score: 32.93048
## Adaptive bandwidth: 117 CV score: 34.29743
## Adaptive bandwidth: 215 CV score: 32.81589
## Adaptive bandwidth: 240 CV score: 32.88213
## Adaptive bandwidth: 202 CV score: 32.79958
## Adaptive bandwidth: 191 CV score: 32.84516
## Adaptive bandwidth: 205 CV score: 32.78288
## Adaptive bandwidth: 211 CV score: 32.81568
## Adaptive bandwidth: 205 CV score: 32.78288
###
dfw_data_sp_2002 <- MPI_data_dr_2002_for_model%>%
as_Spatial()
bw <- bw.gwr(
formula = formula,
data = dfw_data_sp_2002,
kernel = "bisquare",
adaptive = TRUE
)
## Adaptive bandwidth: 173 CV score: 30.78351
## Adaptive bandwidth: 115 CV score: 22.6392
## Adaptive bandwidth: 78 CV score: 18.02988
## Adaptive bandwidth: 56 CV score: 20.33301
## Adaptive bandwidth: 92 CV score: 18.84602
## Adaptive bandwidth: 69 CV score: 18.22104
## Adaptive bandwidth: 83 CV score: 18.02752
## Adaptive bandwidth: 87 CV score: 18.63179
## Adaptive bandwidth: 81 CV score: 17.89142
## Adaptive bandwidth: 79 CV score: 18.02583
## Adaptive bandwidth: 81 CV score: 17.89142
gw_model_2013 <- gwr.basic(
formula = formula,
data = dfw_data_sp_2013,
bw = bw,
kernel = "bisquare",
adaptive = TRUE
)
###
gw_model_2002 <- gwr.basic(
formula = formula,
data = dfw_data_sp_2002,
bw = bw,
kernel = "bisquare",
adaptive = TRUE
)
names(gw_model_2013)
## [1] "GW.arguments" "GW.diagnostic" "lm" "SDF"
## [5] "timings" "this.call" "Ftests"
##
names(gw_model_2002)
## [1] "GW.arguments" "GW.diagnostic" "lm" "SDF"
## [5] "timings" "this.call" "Ftests"
gw_model_2013_results <- gw_model_2013$SDF %>%
st_as_sf()
names(gw_model_2013_results)
## [1] "Intercept" "menage" "Primair" "Moyen"
## [5] "Secondr" "Ltrcy_F" "pct_cm_f" "pop_density"
## [9] "Ltrcy_A" "Ltrcy_W" "Ltrcy_M" "fertlty"
## [13] "y" "yhat" "residual" "CV_Score"
## [17] "Stud_residual" "Intercept_SE" "menage_SE" "Primair_SE"
## [21] "Moyen_SE" "Secondr_SE" "Ltrcy_F_SE" "pct_cm_f_SE"
## [25] "pop_density_SE" "Ltrcy_A_SE" "Ltrcy_W_SE" "Ltrcy_M_SE"
## [29] "fertlty_SE" "Intercept_TV" "menage_TV" "Primair_TV"
## [33] "Moyen_TV" "Secondr_TV" "Ltrcy_F_TV" "pct_cm_f_TV"
## [37] "pop_density_TV" "Ltrcy_A_TV" "Ltrcy_W_TV" "Ltrcy_M_TV"
## [41] "fertlty_TV" "Local_R2" "geometry"
###
gw_model_2002_results <- gw_model_2002$SDF %>%
st_as_sf()
names(gw_model_2002_results)
## [1] "Intercept" "menage" "Primair" "Moyen"
## [5] "Secondr" "Ltrcy_F" "pct_cm_f" "pop_density"
## [9] "Ltrcy_A" "Ltrcy_W" "Ltrcy_M" "fertlty"
## [13] "y" "yhat" "residual" "CV_Score"
## [17] "Stud_residual" "Intercept_SE" "menage_SE" "Primair_SE"
## [21] "Moyen_SE" "Secondr_SE" "Ltrcy_F_SE" "pct_cm_f_SE"
## [25] "pop_density_SE" "Ltrcy_A_SE" "Ltrcy_W_SE" "Ltrcy_M_SE"
## [29] "fertlty_SE" "Intercept_TV" "menage_TV" "Primair_TV"
## [33] "Moyen_TV" "Secondr_TV" "Ltrcy_F_TV" "pct_cm_f_TV"
## [37] "pop_density_TV" "Ltrcy_A_TV" "Ltrcy_W_TV" "Ltrcy_M_TV"
## [41] "fertlty_TV" "Local_R2" "geometry"
ggplot(gw_model_2013_results, aes(fill = Local_R2)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local R-squared values from the GWR model for Guediawaye Census 2013") +
theme_void()

ggplot(gw_model_2002_results, aes(fill = Local_R2)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local R-squared values from the GWR model for Guediawaye Census 2002") +
theme_void()

gw_model_results <- gw_model_2002_results %>%
dplyr::mutate(RGPH = "2002") %>%
plyr::rbind.fill(gw_model_2013_results %>%
dplyr::mutate(RGPH = "2013")) %>%
st_as_sf()
gw_model_results %>%
ggplot(aes(fill = Local_R2)) +
# Adding spatial features to the plot
geom_sf() +
scale_fill_viridis_c() +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

ggplot(gw_model_2013_results, aes(fill = menage)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local parameter estimates for household members for Guediawaye Census 2013") +
theme_void() +
labs(fill = "Local β for \nHH")

ggplot(gw_model_2002_results, aes(fill = menage)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local parameter estimates for household members for Guediawaye Census 2002") +
theme_void() +
labs(fill = "Local β for \nHH")

gw_model_results <- gw_model_2002_results %>%
dplyr::mutate(RGPH = "2002") %>%
plyr::rbind.fill(gw_model_2013_results %>%
dplyr::mutate(RGPH = "2013")) %>%
st_as_sf()
gw_model_results %>%
ggplot(aes(fill = menage)) +
# Adding spatial features to the plot
geom_sf() +
scale_fill_viridis_c() +
labs(fill = "Local β for \nHH")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

ggplot(gw_model_2013_results, aes(fill = pop_density)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local parameter estimates for population density for Guediawaye Census 2013") +
theme_void() +
labs(fill = "Local β for \npopulation density")

ggplot(gw_model_2002_results, aes(fill = pop_density)) +
geom_sf(color = NA) +
scale_fill_viridis_c() +
labs(title = "Local parameter estimates for population density for Guediawaye Census 2002") +
theme_void() +
labs(fill = "Local β for \npopulation density")

gw_model_results <- gw_model_2002_results %>%
dplyr::mutate(RGPH = "2002") %>%
plyr::rbind.fill(gw_model_2013_results %>%
dplyr::mutate(RGPH = "2013")) %>%
st_as_sf()
gw_model_results %>%
ggplot(aes(fill = pop_density)) +
# Adding spatial features to the plot
geom_sf() +
scale_fill_viridis_c() +
labs(fill = "Local β for \npopulation density")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

Classification and
clustering of Guediawaye census data
Geodemographic
classification
set.seed(1994)
dfw_kmeans_2013 <- dfw_pca_2013 %>%
st_drop_geometry() %>%
select(PC1:PC8) %>%
kmeans(centers = 6)
table(dfw_kmeans_2013$cluster)
##
## 1 2 3 4 5 6
## 53 73 139 71 95 4
##
dfw_kmeans_2002 <- dfw_pca_2002 %>%
st_drop_geometry() %>%
select(PC1:PC8) %>%
kmeans(centers = 6)
table(dfw_kmeans_2002$cluster)
##
## 1 2 3 4 5 6
## 72 38 3 3 83 69
dfw_clusters_2013 <- dfw_pca_2013 %>%
mutate(cluster = as.character(dfw_kmeans_2013$cluster))
ggplot(dfw_clusters_2013, aes(fill = cluster)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set1") +
labs(title = "Map of geodemographic clusters for Guediawaye Census 2013") +
theme_void() +
labs(fill = "Cluster ")

dfw_clusters_2002 <- dfw_pca_2002 %>%
mutate(cluster = as.character(dfw_kmeans_2002$cluster))
ggplot(dfw_clusters_2002, aes(fill = cluster)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set1") +
labs(title = "Map of geodemographic clusters for Guediawaye Census 2002") +
theme_void() +
labs(fill = "Cluster ")

dfw_clusters <- dfw_clusters_2002 %>%
dplyr::mutate(RGPH = "2002") %>%
plyr::rbind.fill(dfw_clusters_2013 %>%
dplyr::mutate(RGPH = "2013")) %>%
st_as_sf()
dfw_clusters %>%
ggplot(aes(fill = cluster)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set1") +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

library(plotly)
cluster_plot <- ggplot(dfw_clusters_2013,
aes(x = PC1, y = PC2, color = cluster)) +
geom_point() +
scale_color_brewer(palette = "Set1") +
labs(title = "Interactive scatterplot of PC1 and PC2 colored by cluster for Guediawaye Census 2013") +
theme_minimal()
ggplotly(cluster_plot) %>%
layout(legend = list(orientation = "h", y = -0.15,
x = 0.2, title = "Cluster"))
cluster_plot <- ggplot(dfw_clusters_2002,
aes(x = PC1, y = PC2, color = cluster)) +
geom_point() +
scale_color_brewer(palette = "Set1") +
labs(title = "Interactive scatterplot of PC1 and PC2 colored by cluster for Guediawaye Census 2002") +
theme_minimal()
ggplotly(cluster_plot) %>%
layout(legend = list(orientation = "h", y = -0.15,
x = 0.2, title = "Cluster"))
Spatial clustering
& regionalization
library(spdep)
input_vars <- dfw_pca_2013 %>%
select(PC1:PC8) %>%
st_drop_geometry() %>%
as.data.frame()
skater_nbrs <- poly2nb(dfw_pca_2013, queen = TRUE)
costs <- nbcosts(skater_nbrs, input_vars)
skater_weights <- nb2listw(skater_nbrs, costs, style = "B")
mst <- mstree(skater_weights)
regions <- skater(
mst[,1:2],
input_vars,
ncuts = 7,
crit = 10
)
dfw_clusters_2013$region <- as.character(regions$group)
ggplot(dfw_clusters_2013, aes(fill = region)) +
geom_sf(size = 0.1) +
labs(title = "Map of contiguous regions derived with the SKATER algorithm for Guediawaye Census 2013") +
scale_fill_brewer(palette = "Set1") +
theme_void()

input_vars <- dfw_pca_2002 %>%
select(PC1:PC8) %>%
st_drop_geometry() %>%
as.data.frame()
skater_nbrs <- poly2nb(dfw_pca_2002, queen = TRUE)
costs <- nbcosts(skater_nbrs, input_vars)
skater_weights <- nb2listw(skater_nbrs, costs, style = "B")
mst <- mstree(skater_weights)
regions <- skater(
mst[,1:2],
input_vars,
ncuts = 7,
crit = 10
)
dfw_clusters_2002$region <- as.character(regions$group)
ggplot(dfw_clusters_2002, aes(fill = region)) +
geom_sf(size = 0.1) +
labs(title = "Map of contiguous regions derived with the SKATER algorithm for Guediawaye Census 2002") +
scale_fill_brewer(palette = "Set1") +
theme_void()

dfw_clusters <- dfw_clusters_2002 %>%
dplyr::mutate(RGPH = "2002") %>%
plyr::rbind.fill(dfw_clusters_2013 %>%
dplyr::mutate(RGPH = "2013")) %>%
st_as_sf()
dfw_clusters %>%
ggplot(aes(fill = region)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set1") +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = c(.92, .03),
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))
